{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# import package\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "data": {
      "text/plain": "   x1 x2  y\n0   1  S -1\n1   1  M -1\n2   1  M  1\n3   1  S  1\n4   1  S -1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>x1</th>\n      <th>x2</th>\n      <th>y</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>S</td>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>M</td>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>M</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>S</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>S</td>\n      <td>-1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1 = [1,1,1,1,1,2,2,2,2,2,3,3,3,3,3]\n",
    "x2 = ['S','M','M','S','S','S','M','M','L','L','L','M','M','L','L']\n",
    "y = [-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]\n",
    "\n",
    "df = pd.DataFrame({'x1':x1, 'x2':x2, 'y':y})\n",
    "df.head()\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "outputs": [
    {
     "data": {
      "text/plain": "    y\n0  -1\n1  -1\n2   1\n3   1\n4  -1\n5  -1\n6  -1\n7   1\n8   1\n9   1\n10  1\n11  1\n12  1\n13  1\n14 -1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>y</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>-1</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>-1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = df[['x1', 'x2']]\n",
    "y = df[['y']]\n",
    "y"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "outputs": [],
   "source": [
    "def nb_fit(X, y):\n",
    "    # 统计类标记的个数\n",
    "    classes = y[y.columns[0]].unique()\n",
    "    # print(classes)\n",
    "    # 统计各个类标记的的数量\n",
    "    class_count = y[y.columns[0]].value_counts()\n",
    "    # 类标记的先验概率\n",
    "    class_prior = class_count / len(y)\n",
    "    prior = dict()\n",
    "    for col in X.columns:\n",
    "        for j in classes:\n",
    "            p_x_y = X[(y==j).values][col].value_counts()\n",
    "            for i in p_x_y.index:\n",
    "                prior[(col, i, j)] = p_x_y[i]/class_count[j]\n",
    "    return classes, class_count, class_prior, prior"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [
    {
     "data": {
      "text/plain": "(array([-1,  1], dtype=int64),\n  1    9\n -1    6\n Name: y, dtype: int64,\n  1    0.6\n -1    0.4\n Name: y, dtype: float64,\n {('x1', 1, -1): 0.5,\n  ('x1', 2, -1): 0.3333333333333333,\n  ('x1', 3, -1): 0.16666666666666666,\n  ('x1', 3, 1): 0.4444444444444444,\n  ('x1', 2, 1): 0.3333333333333333,\n  ('x1', 1, 1): 0.2222222222222222,\n  ('x2', 'S', -1): 0.5,\n  ('x2', 'M', -1): 0.3333333333333333,\n  ('x2', 'L', -1): 0.16666666666666666,\n  ('x2', 'M', 1): 0.4444444444444444,\n  ('x2', 'L', 1): 0.4444444444444444,\n  ('x2', 'S', 1): 0.1111111111111111})"
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nb_fit(X,y)\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}